//markov.c
/*
Honghe
*/

# include <stdio.h>
# include <stdlib.h>
# include <time.h>
# include "uthash.h"
# include "ArrayList.h"

#define BUF_MAX 50  // 存储单词的字符数组大小

struct struct_dict {
    char *key;
    st_strlist *value;
    UT_hash_handle hh;
};

struct struct_dict *word_dict = NULL; // 全局字典

void add_word(char *words, char *name) {
    struct struct_dict *s = NULL;
    HASH_FIND_STR(word_dict, words, s);  // words是否已在字典中
    if (s == NULL)
    {
        s = (struct struct_dict *)malloc(sizeof(struct struct_dict));
        s->key = (char *)malloc((strlen(words) + 1) * sizeof(char));
        memcpy(s->key, words, strlen(words));
        // memcpy后一定别忘了字符串末尾加0表示中止
        s->key[strlen(words)] = 0;
        s->value = strlist_malloc();
        HASH_ADD_KEYPTR(hh, word_dict, s->key, strlen(s->key), s);        
    }
    strlist_add(s->value, name);
}

struct struct_dict *find_word(char *words) {
    struct struct_dict *s = NULL;
    HASH_FIND_STR(word_dict, words, s);
    return s;
}

int main(int argc, char const *argv[]){
    FILE *fp = NULL;
    char buf[BUF_MAX] = {0};
    char buf2[BUF_MAX * 2] = {0};
    int i=0;
    int len_str1 = 0;
    int len_str2 = 0;
    st_strlist *strlist = NULL;
    struct struct_dict *s = NULL;
    char *input_file = NULL;
    int essay_size = 0;
    char *w1 = NULL;
    char *w2 = NULL;
    int seed = 0;
    int num_words = 0;

    // 需要两个参数
    // argv[1] 输入文件名
    // argv[2] 生成文章大小，大于2
    if (argc < 3)
    {
        printf("With 2 arguments\n");
        return;
    }
    input_file = argv[1];
    essay_size = atoi(argv[2]);

    // 读取文件至string list
    strlist = strlist_malloc();
    fp = fopen(input_file, "rb");
    while(fscanf(fp, "%s ", buf) > 0){
        strlist_add(strlist, buf);
    }
    // printf("string list size: %d\n", strlist->size);

    // 生成数据库
    for (i = 0; i < strlist->size - 2; ++i)
    {
        // 先生成key
        len_str1 = strlen(strlist->list[i]);
        len_str2 = strlen(strlist->list[i+1]);
        memcpy(buf2, strlist->list[i], len_str1);
        memcpy(buf2 + len_str1, strlist->list[i+1], len_str2);
        buf2[len_str1 + len_str2] = 0;

        // 向词典中添加key
        add_word(buf2, strlist->list[i+2]);
    }
    num_words = HASH_COUNT(word_dict);

    // // 测试是否在数据库中
    // s = find_word("Helloworld,");
    // if (s)
    // {
    //     printf("hit.\n");
    //     for(i=0; i<s->value->size; i++){
    //         printf("%d, %s\n",i, s->value->list[i]);
    //     }
    // }
    // else{
    //     printf("not hit.\n");
    // }

    // 生成文章
    srand((unsigned)(time(NULL)));  //设置随机数种子
    // srand(111111);
    seed = rand() % (strlist->size -2);
    w1 = strlist->list[seed];
    w2 = strlist->list[seed + 1];
    printf("%s ", w1);
    for (i = 0; i < essay_size; ++i)
    {
        printf("%s ", w2);
        len_str1 = strlen(w1);
        len_str2 = strlen(w2);
        memcpy(buf2, w1, len_str1);
        memcpy(buf2 + len_str1, w2, len_str2);
        buf2[len_str1 + len_str2] = 0;
        s = (find_word(buf2));
        if (s == NULL)
        {
            printf("\n***\n End of input file.\n***\n");
            return;
        }
        strlist = s->value;
        seed = rand() % (strlist->size);        
        w1 = w2;
        w2 = strlist->list[seed];
    }
    printf("\n");
    // free
    strlist_free(strlist);
}